Data Visualization in Pandas¶

In [1]:
#importing the libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

Plotting graph with plot() method¶

In [2]:
data=pd.Series(np.random.randn(1000).cumsum())
data
Out[2]:
0      -0.584127
1      -1.377490
2      -0.882970
3       0.461625
4       0.822463
         ...    
995   -28.696661
996   -28.190823
997   -29.000591
998   -28.343885
999   -29.862400
Length: 1000, dtype: float64

1. Line Plot¶

In [3]:
data.plot()
plt.show()

Another Example¶

In [8]:
df1 = pd.DataFrame(np.random.randn(100, 4),columns=['a','b','c','d'])
df1 = df1.cumsum()
df1
Out[8]:
a b c d
0 -0.955439 0.221885 -0.151688 0.727239
1 -2.242434 -0.084837 0.482656 -0.276666
2 -1.637941 0.278600 -0.780838 0.148659
3 -2.197805 0.620449 0.493361 0.993773
4 -1.378125 -0.694380 -0.383419 -0.634611
... ... ... ... ...
95 3.821256 -12.634502 -8.880621 -4.884173
96 3.650171 -11.791885 -9.615944 -4.033960
97 4.060936 -12.506512 -8.989279 -4.387667
98 4.774044 -12.897997 -10.557663 -3.829007
99 4.483041 -12.122258 -10.096502 -3.049107

100 rows × 4 columns

In [9]:
df1.plot()
plt.show()

2. Bar Graph¶

In [12]:
df2=pd.DataFrame(np.random.rand(7,3), columns=['a','b','c'])
df2
Out[12]:
a b c
0 0.273284 0.169643 0.899382
1 0.564891 0.266047 0.257294
2 0.339101 0.343239 0.644331
3 0.841610 0.953456 0.942113
4 0.064272 0.341218 0.710116
5 0.517114 0.386171 0.055171
6 0.860341 0.688838 0.697439
In [13]:
df2.plot.bar()
plt.show()

3. Histogram¶

In [29]:
df = pd.read_csv("iris.data", header=None)
df.columns=["sepal_length","sepal_width", "petal_length", "petal_width", "species"]
In [30]:
df.head()
Out[30]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
In [31]:
df.plot.hist(alpha=0.5)
plt.show()

4. Boxplot Charts¶

In [32]:
df.plot.box()
plt.show()
In [33]:
# Changing the colors
colors={'boxes': 'Red', 'whiskers': 'blue','medians': 'Black', 'caps': 'Green'}
df.plot.box(color=colors)
plt.show()

To plot Horizontally¶

In [34]:
df.plot.box(vert=False)
plt.show()

5. Area Charts¶

In [35]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['a','b','c','d'])
df.head()
Out[35]:
a b c d
0 0.937786 0.418841 0.799605 0.119481
1 0.233060 0.488327 0.558990 0.271129
2 0.747060 0.356159 0.446656 0.736579
3 0.318732 0.837289 0.092116 0.409191
4 0.959139 0.061842 0.452362 0.846347
In [36]:
df.plot.area()
plt.show()
In [37]:
#for plotting specified column.
df['c'].plot.area()
plt.show()

6. Scatter Plots¶

In [26]:
df.plot.scatter(x='a', y='b')
plt.show()
In [38]:
df = pd.read_csv("iris.data", header=None)
df.columns=["sepal_length","sepal_width", "petal_length", "petal_width", "species"]
df.plot.scatter(x='sepal_length', y='sepal_width', s=df['petal_length'] * 50)
plt.show()

7. Pie Charts¶

In [40]:
df_avg=df["petal_width"].groupby(df["species"]).mean()
df_avg
Out[40]:
species
Iris-setosa        0.244
Iris-versicolor    1.326
Iris-virginica     2.026
Name: petal_width, dtype: float64
In [41]:
df_avg.plot.pie()
Out[41]:
<AxesSubplot:ylabel='petal_width'>

8. Density Chart¶

In [42]:
df.plot.kde()
plt.show()